#-----------------------------------------------------------------------#
# Replication Script for Chen, Multilayer Networks in Political Science #
#-----------------------------------------------------------------------#
# NOTE: To ensure everything reproduces smoothly, please begin by running everything in section 01. 
# The code in each substantive section (A and B) can be run separately, but order within section is required.
# Four parallel processes are required to run the ERGM portions of the replication code.
# A separate document contains the session info outlining the exact package versions and operating system used to produce these results.
# While the substantive results will not differ, there will be slight differences if different package versions are used.

# Please contact me at ted.hsuanyun.chen@gmail.com with any questions.

#----------------------------#
# Contents..............line #
#----------------------------#
# 01. Preparations........29 #
# A0. ChemG...............52 #
# A1. Data Preparation....62 #
# A2. ERGM................83 #
# A3. Model Fit..........181 #
# A4. Plotting...........237 #
# B0. Global Conflict....277 #
# B1. Data Preparation...288 #
# B2. ERGM...............314 #
# B3. Model Fit..........414 #
# B4. Plotting...........501 #
#----------------------------#

#------------------#
# 01. Preparations #
#------------------#
rm(list = ls())

# Set working directory
# This is where the output files will be saved
setwd("Dropbox/00_Multilayer")

# Creating output folder
if(!dir.exists("./output")){dir.create("./output")}

# Loading packages
# Installs the "devtools" package if not already installed
if(!require("devtools")){install.packages("devtools"); library(devtools)}

# Installs ergm package version 3.10.1
install_version("ergm", version = "3.10.1", repos = "http://cran.us.r-project.org", upgrade = F)

# Installs the "multilayer.ergm" package from my Github
install_github("tedhchen/multilayer.ergm", upgrade = F)
library(multilayer.ergm)

#-----------#
# A0. ChemG #
#-----------#

# The ChemG application draws on Leifeld and Schneider, 2012, AJPS. 
# Data directly obtained from LS's replication files and slightly cleaned.
# committee, infrep, pol, prefsim, and sci are networks in matrix form; types is a vector of organization type
# See pp. 10-17 of main text for details.
# The ChemG portion of the replication code takes approximately 1 hour and 30 minutes to run using Amazon Web Service's C5 instances.

#----------------------#
# A1. Data Preparation #
#----------------------#

set.seed(804713)

# Organizing into multilayer structure. Political communication on layer 1 and scientific communication on layer 2.
nw <- cbind(rbind(pol, diag(30)), rbind(diag(30), sci))

# Into network class
nw <- network(nw, directed = T)
nw%v%"layer.mem" <- c(rep(1, 30), rep(2, 30))
nw%v%"type" <- rep(types, 2)

# Constraining the off-diagonal matrix blocks
free <- matrix(c(rep(c(rep(1, 30), rep(0, 30)), 30),
                 rep(c(rep(0, 30), rep(1, 30)), 30)),
               nrow = 60, byrow = T)
diag(free) <- 0
free <- network(free, directed = T)

#----------#
# A2. ERGM #
#----------#

# Layer independence model
mod.reduced <- ergm(nw
                    ~edgecov_layer("edges", layer = 1)
                    +gwesp_layer(decay = 0.1, fixed = T, layer = 1)
                    +gwdsp_layer(decay = 0.1, fixed = T, layer = 1)
                    +nodeifactor_layer("type", base = 2:5, layer = 1)
                    +nodeofactor_layer("type", base = 1:4, layer = 1)
                    +edgecov_layer(sci, layer = 1)
                    +edgecov_layer(committee, layer = 1)
                    +edgecov_layer(infrep, layer = 1)
                    +edgecov_layer(prefsim, layer = 1)
                    +edgecov_layer("edges", layer = 2)
                    +gwesp_layer(decay = 0.1, fixed = T, layer = 2)
                    +gwdsp_layer(decay = 0.1, fixed = T, layer = 2)
                    +nodeifactor_layer("type", base = 2:5, layer = 2)
                    +nodeofactor_layer("type", base = 1:4, layer = 2)
                    +edgecov_layer(pol, layer = 2)
                    +edgecov_layer(committee, layer = 2)
                    +edgecov_layer(infrep, layer = 2)
                    +edgecov_layer(prefsim, layer = 2)
                    +mutual(same="layer.mem", diff = T)
                    +nodemix(c("layer.mem", "type"), base = -c(12, 67)),
                    # Settings
                    eval.loglik = T, verbose = T,
                    control=control.ergm(seed = 6156713,
                                         MCMC.burnin = 20000,
                                         MCMC.samplesize = 20000,
                                         MCMC.interval = 2000,
                                         parallel = 4),
                    constraints = ~fixallbut(free))

# Model summary output
sink(file = "output/chemg_mod.reduced.summary.txt")
summary(mod.reduced)
sink()

# Model fit output
gof.reduced <- gof(mod.reduced, control = control.gof.ergm(nsim = 1000, seed = 1406018))
pdf("output/chemg_mod.reduced.gof.pdf", height = 8, width = 8)
plot(gof.reduced)
dev.off()

pdf("output/chemg_mod.reduced.mcmc.pdf", height = 8, width = 8)
mcmc.diagnostics(mod.reduced)
dev.off()

# Layer dependence model
mod.full <- ergm(nw
                 ~edgecov_layer("edges", layer = 1)
                 +gwesp_layer(decay = 0.1, fixed = T, layer = 1)
                 +gwdsp_layer(decay = 0.1, fixed = T, layer = 1)
                 +nodeifactor_layer("type", base = 2:5, layer = 1)
                 +nodeofactor_layer("type", base = 1:4, layer = 1)
                 +edgecov_layer(committee, layer = 1)
                 +edgecov_layer(infrep, layer = 1)
                 +edgecov_layer(prefsim, layer = 1)
                 +edgecov_layer("edges", layer = 2)
                 +gwesp_layer(decay = 0.1, fixed = T, layer = 2)
                 +gwdsp_layer(decay = 0.1, fixed = T, layer = 2)
                 +nodeifactor_layer("type", base = 2:5, layer = 2)
                 +nodeofactor_layer("type", base = 1:4, layer = 2)
                 +edgecov_layer(committee, layer = 2)
                 +edgecov_layer(infrep, layer = 2)
                 +edgecov_layer(prefsim, layer = 2)
                 +mutual(same="layer.mem", diff = T)
                 +nodemix(c("layer.mem", "type"), base = -c(12, 67))
                 +duplexdyad(type = letters[c(5 ,6, 7, 8)]),
                 # Settings
                 eval.loglik = T, verbose = T,
                 control = control.ergm(seed = 556914,
                                        MCMC.burnin = 20000,
                                        MCMC.samplesize = 20000,
                                        MCMC.interval = 2000,
                                        parallel = 4),
                 constraints = ~fixallbut(free))

# Model summary output
sink(file = "output/chemg_mod.full.summary.txt")
summary(mod.full)
sink()

# Model fit output
gof.full <- gof(mod.full, control = control.gof.ergm(nsim = 1000, seed = 1180906))
pdf("output/chemg_mod.full.gof.pdf", height = 8, width = 8)
plot(gof.full)
dev.off()

pdf("output/chemg_mod.full.mcmc.pdf", height = 8, width = 8)
mcmc.diagnostics(mod.full)
dev.off()

# Saving ERGM outputs
save(mod.reduced, mod.full, gof.reduced, gof.full, file = "output/chemg_ergm.RData")

#---------------#
# A3. Model Fit #
#---------------#

# Assessing fit of cross-layer terms
dyadfit.observed <- summary(nw ~duplexdyad(type = letters[c(5 ,6, 7, 8)]))
dyadfit.reduced <- simulate(mod.reduced, nsim = 1000, monitor = ~duplexdyad(type = letters[c(5 ,6, 7, 8)]),
                            statsonly = T, seed = 358946)[,c(23:26)]
dyadfit.full <- simulate(mod.full, nsim = 1000,
                         statsonly = T, seed = 71905)[,c(21:24)]

# Output Table 3.
sink(file = "output/chemg_fit.table.txt")
cbind(dyadfit.observed,
      colMeans(dyadfit.reduced), apply(dyadfit.reduced, 2, sd),
      colMeans(dyadfit.full), apply(dyadfit.full, 2, sd))
sink()

# Assess fit of different actor-dyad structures
# Function to count observed dyad census categories
dyad.census.count <- function(nw){
  mat <- as.matrix.network.adjacency(nw)
  nnodes <- nrow(mat)/2
  dyads <- combn(nnodes, 2)
  counts <- rep(NA, ncol(dyads))
  for(i in 1:length(counts)){
    dyad <- c(mat[dyads[1, i], dyads[2, i]],
              mat[dyads[2, i], dyads[1, i]],
              mat[dyads[1, i] + nnodes, dyads[2, i] + nnodes],
              mat[dyads[2, i] + nnodes, dyads[1, i] + nnodes])
    if(sum(dyad) == 4){counts[i] <- 9}
    if(sum(dyad) == 0){counts[i] <- 10}
    if(sum(dyad) == 1){counts[i] <- ifelse(sum(dyad[1:2]) == 1, 1, 2)}
    if(sum(dyad) == 3){counts[i] <- ifelse(sum(dyad[1:2]) == 1, 8, 7)}
    if(sum(dyad) == 2){
      if(sum(dyad[1:2]) == 1){
        counts[i] <- ifelse(dyad[1] == dyad[3], 5, 6)
      }else{
        counts[i] <- ifelse(sum(dyad[1:2]) == 0, 4, 3)
      }
    }
  }
  tabulate(counts)
}

# Function to count dyad census for nsim simulated networks from supplied model
chemg.fit <- function(model, nsim = 1000, seed){
  nw.list<-simulate(model, verbose = F, nsim = nsim, seed = seed)
  t(sapply(nw.list,dyad.census.count))
}

# Running counting functions
observed.census <- dyad.census.count(nw)
mod.full.census <- chemg.fit(mod.full, nsim = 1000, seed = 2558821)
mod.reduced.census <- chemg.fit(mod.reduced, nsim = 1000, seed = 347938)

#--------------#
# A4. Plotting #
#--------------#
# Colors
orng.c<-"#fcae91FF"
purp.c<-"#7828a0FF"
gren.c<-"#46aa96FF"
lgry.c<-"#dcdcdcFF"
mgry.c<-"#8c8c8cFF"
dgry.c<-"#333333FF"

# Plotting model fit results
pdf("output/dyad.census.density.pdf", height = 10.5, width = 20)
par(oma = c(0, 0, 0, 0))
par(omi = c(0.5, 0, 0, 0))
par(mfrow = c(2, 5))
par(mar = c(6, 1, 4, 1))
for(i in 1:10){
  den.full <- density(mod.full.census[, i], bw = "nrd")
  den.reduced <- density(mod.reduced.census[, i], bw = "nrd")
  den.full$y[1] <- 0
  den.reduced$y[1] <- 0
  plot.new()
  plot.window(xlim = range(c(den.full$x, den.reduced$x)),
              ylim = c(0, max(c(den.full$y, den.reduced$y))))
  abline(h = 0)
  abline(v = observed.census[i])
  polygon(den.reduced, col = adjustcolor(orng.c, alpha.f = 0.3), border = dgry.c, lty = 1, lwd = 2)
  polygon(den.full, col = adjustcolor(purp.c, alpha.f = 0.3), border = dgry.c, lty = 5, lwd = 2)
  
  box(); axis(1, cex.axis = 2.5, tick = F, line = 1)
  title(main = c(LETTERS[1:9], 0)[i], cex.main = 4)
}
par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0), mar=c(0, 0, 0, 0), new = T)
plot(0, 0, type = "n", bty = "n", xaxt = "n", yaxt = "n")
legend("bottomright", xpd = T, horiz = T, bty = "n", pch = 22, pt.bg = adjustcolor(c(orng.c, purp.c), alpha.f = 0.3), pt.lwd = 0.5,
       lty = c(1, 5), lwd = 2, pt.cex = 8, seg.len = 1,
       legend = c("Layer Independence", "Cross-Layer Dependence"), cex = 3.5)
dev.off()

#---------------------#
# B0. Global Conflict #
#---------------------#

# The global conflict application focuses on the tendency for conflicts to cluster. 
# It is loosely based on Gleditsch, Salehyan, and Schultz (2008), JCR.
# Data obtained from the GSS's replication files, COW, and UCDP.
# Objects ending in .mat are networks in matrix form; node.atts is a data.frame of 733 rows for nodal attributes.
# See pp. 17-26 of main text for details.
# The global conflict portion of the replication code takes approximately 45 hours to run using Amazon Web Service's C5 instances.

#----------------------#
# B1. Data Preparation #
#----------------------#

rm(list = ls())
set.seed(20842)

# Organizing into multilayer structure. Political communication on layer 1 and scientific communication on layer 2.
nw <- cbind(rbind(mid, t(intraconf)), rbind(intraconf, nsconf))

# Into network class
nw <- network(nw, directed = F)
nw%v%"layer.mem" <- c(rep(1, 189), rep(2, 544))
nw%v%"dem" <- nodeatts$dem
nw%v%"cinc" <- nodeatts$cinc
nw%v%"trans" <- nodeatts$trans
nw%v%"interstate" <- nodeatts$interstate
nw%v%"intrastate" <- nodeatts$intrastate
nw%v%"nonstate" <- nodeatts$nonstate

# Constraining the few dyads that never coexist
absent <- matrix(0, ncol = 733, nrow = 733)
absent[1:189, 1:189] <- (1 - intoverlap)
diag(absent) <- 1
absent <- network(absent, directed = F)

#----------#
# B2. ERGM #
#----------#

# Layer Independence Model
mod.reduced <- ergm(nw
                    ~edgecov_layer("edges", layer = 1)
                    +degree_layer(0, layer = 1)
                    +altkstar.fixed_layer(layer = 1)
                    +nodefactor_layer("dem", layer = 1)
                    +nodefactor_layer("intrastate", layer = 1)
                    +nodecov_layer("cinc", layer = 1)
                    +edgecov_layer(demdem, layer = 1)
                    +edgecov_layer(contig, layer = 1)
                    +edgecov_layer(polrel, layer = 1)
                    +edgecov_layer(caprat, layer = 1)
                    +edgecov_layer("edges", layer = c(1, 2))
                    +altkstar.fixed_layer(layer = c(1,2))
                    +nodefactor_layer("dem", layer = c(1, 2))
                    +nodefactor_layer("trans", layer = c(1, 2))
                    +nodefactor_layer("interstate", layer = c(1, 2))
                    +nodefactor_layer("nonstate", layer = c(1, 2))
                    +edgecov_layer("edges", layer = 2)
                    +degree_layer(0, layer = 2)
                    +kstar_layer(2, layer = 2)
                    +altkstar.fixed_layer(layer = 2)
                    +nodefactor_layer("intrastate", layer = 2),
                    # Settings
                    eval.loglik = T, check.degeneracy = T, verbose = T,
                    control = control.ergm(seed = 80672,
                                           MCMC.burnin = 100000,
                                           MCMC.samplesize = 20000,
                                           MCMC.interval = 10000,
                                           parallel = 4),
                    constraints = ~fixedas(absent = absent))

# Model summary output
sink(file = "output/globalconflict_mod.reduced.summary.txt")
summary(mod.reduced)
sink()

# Model fit output
gof.reduced <- gof(mod.reduced, control = control.gof.ergm(nsim = 1000, seed = 1208711))
pdf("output/globalconflict_mod.reduced.gof.pdf", height = 8, width = 8)
plot(gof.reduced)
dev.off()

pdf("output/globalconflict_mod.reduced.mcmc.pdf", height = 8, width = 8)
mcmc.diagnostics(mod.reduced)
dev.off()

#Cross-layer Dependence Model
mod.full <- ergm(nw
                 ~edgecov_layer("edges", layer = 1)
                 +degree_layer(0, layer = 1)
                 +altkstar.fixed_layer(layer = 1)
                 +nodefactor_layer("dem", layer = 1)
                 +nodecov_layer("cinc", layer = 1)
                 +edgecov_layer(demdem, layer = 1)
                 +edgecov_layer(contig, layer = 1)
                 +edgecov_layer(polrel, layer = 1)
                 +edgecov_layer(caprat, layer = 1)
                 +edgecov_layer("edges", layer = c(1, 2))
                 +altkstar.fixed_layer(layer = c(1,2))
                 +nodefactor_layer("dem", layer = c(1, 2))
                 +nodefactor_layer("trans", layer = c(1, 2))
                 +edgecov_layer("edges", layer = 2)
                 +degree_layer(0, layer = 2)
                 +kstar_layer(2, layer = 2)
                 +altkstar.fixed_layer(layer = 2)
                 +altkstar.fixed_crosslayer(lambda = 1, layers = list(1,c(1,2)))
                 +altkstar.fixed_crosslayer(lambda = 1, layers = list(c(1,2),2))
                 +threetrail_crosslayer(layers = list(1,c(1,2),2), incident = 1),
                 # Settings
                 eval.loglik = T, check.degeneracy = T, verbose = T,
                 control = control.ergm(seed = 30748,
                                        MCMC.burnin = 100000,
                                        MCMC.samplesize = 20000,
                                        MCMC.interval = 10000,
                                        parallel = 4),
                 constraints = ~fixedas(absent = absent))

# Model summary output
sink(file = "output/globalconflict_mod.full.summary.txt")
summary(mod.full)
sink()

# Model fit output
gof.full <- gof(mod.full, control = control.gof.ergm(nsim = 1000, seed = 707427))
pdf("output/globalconflict_mod.full.gof.pdf", height = 8, width = 8)
plot(gof.full)
dev.off()

pdf("output/globalconflict_mod.full.mcmc.pdf", hgeight = 8, width = 8)
mcmc.diagnostics(mod.full)
dev.off()

# Saving results
save(mod.reduced, mod.full, gof.reduced, gof.full, file = "output/globalconflict_ergm.RData")

#---------------#
# B3. Model Fit #
#---------------#
#load("output/globalconflict_ergm.RData")

# Assessing model fit to different domestic clustering scenarios
# Function to determine distribution of interstate conflict ties according to domestic clustering
conflict.environment.count <- function(nw){
  # Internal functions
  # Determining the domestic conflict environments of a particular interstate conflict dyad
  strat.environment <- function(interstate, intrastate, nonstate){
    # Is the node in civil conflict?
    ns.conf <- function(c.node, intrastate, nonstate){
      ifelse(sum(nonstate[which(intrastate[c.node,] == 1),]) > 1, 1, 0)
    }
    # Are the civil conflict partners of two countries the same?
    ns.same <- function(interstate, intrastate){
      ifelse(sum(which(intrastate[interstate[1],] == 1) %in% which(intrastate[interstate[2],] == 1)) > 0, 1, 0)
    }
    # Are the civil conflict partners of two countries in conflict with each other?
    ns.joint <- function(interstate, intrastate, nonstate){
      ifelse(sum(which(nonstate[which(intrastate[interstate[1],] == 1), , drop = F] == 1, arr.ind = T)[,2] %in% which(nonstate[which(intrastate[interstate[2],] == 1),, drop = F] == 1, arr.ind = T)[,2]) > 0, 1, 0)
    }
    # Are the countries in civil conflict?
    civ1 <- ifelse(sum(intrastate[interstate[1],]) > 0, 1, 0)
    civ2 <- ifelse(sum(intrastate[interstate[2],]) > 0, 1, 0)
    # If both are not in civil conflict:
    if(civ1 + civ2 == 0){e.type <- 0}else{
      # If only one is in civil conflict:
      if(civ1 + civ2 == 1){
        if(ns.conf(interstate[which(c(civ1, civ2) == 1)], intrastate, nonstate) == 0){
          # If non-state side of civil conflict is not in a nonstate conflict
          e.type <- 1
        }else{
          # If non-state side of civil conflict is in a nonstate conflict
          e.type <- 2
        }
      }else{
        # If both are in civil conflict:
        if(ns.same(interstate, intrastate) == 1){
          # If countries share civil conflict partners:
          e.type <- 6
        }else{
          # If countries do not share civil conflict partners:
          if(ns.conf(interstate[1], intrastate, nonstate) + ns.conf(interstate[2], intrastate, nonstate) == 0){
            # If civil conflict partners are not in nonstate conflicts:
            e.type <- 3
          }else{
            #If civil conflict partners are in nonstate conflicts:
            if(ns.joint(interstate, intrastate, nonstate) == 1){
              # If civil conflict partners are fighting with each other:
              e.type <- 5
            }else{
              # If civil conflict partners are not fighting with each other:
              e.type <- 4
            }
          }
        }
      }
    }
    return(e.type)
  }
  
  mat <- as.matrix.network.adjacency(nw)
  layer.mem <- get.node.attr(nw, "layer.mem")
  n.s <- sum(layer.mem == 1)
  n.ns <- sum(layer.mem == 2)
  dyads <- unique(t(apply(which(mat[1:n.s, 1:n.s] == 1, arr.ind = T), 1, sort)))
  
  counts <- apply(dyads, 1, strat.environment, 
                  intrastate = mat[1:n.s, (n.s + 1):(n.s + n.ns)],
                  nonstate = mat[(n.s + 1):(n.s + n.ns), (n.s + 1):(n.s + n.ns)])
  tabulate(counts+1, 7)
}


# Function to determine interstate clustering distribution for nsim simulated networks from supplied model
globalconflict.fit <- function(model, nsim = 1000, seed){
  nw.list<-simulate(model, verbose = F, nsim = nsim, seed = seed)
  t(sapply(nw.list, conflict.environment.count))
}

# Running counting functions
observed.environments <- conflict.environment.count(nw)
mod.full.environments <- globalconflict.fit(mod.full, nsim = 1000, seed = 20843)
mod.reduced.environments <- globalconflict.fit(mod.reduced, nsim = 1000, seed = 606711)

#--------------#
# B4. Plotting #
#--------------#
# Colors
orng.c<-"#fcae91FF"
purp.c<-"#7828a0FF"
gren.c<-"#46aa96FF"
lgry.c<-"#dcdcdcFF"
mgry.c<-"#8c8c8cFF"
dgry.c<-"#333333FF"

# Plotting results
pdf("output/strat.environ.density.pdf", height = 10, width = 16)
par(oma = c(0, 0, 0, 0))
par(omi = c(0, 0, 0, 0))
par(mfrow = c(2, 4))
par(mar = c(6, 1, 4, 1))
for(i in c(1:7)){
  den.full <- density(mod.full.environments[, i], bw = ifelse(i > 5, "nrd0", "nrd"))
  den.reduced <- density(mod.reduced.environments[, i], bw = ifelse(i > 5, "nrd0", "nrd"))
  den.full$y[1] <- 0
  den.reduced$y[1] <- 0
  plot.new()
  plot.window(xlim = range(c(den.full$x, den.reduced$x, 4)), ylim = c(0, max(c(den.full$y, den.reduced$y))))
  abline(h=0)
  abline(v = observed.environments[i])
  polygon(den.reduced, col = adjustcolor(orng.c, alpha.f = 0.3), border = dgry.c, lty = 1, lwd = 2)
  polygon(den.full, col = adjustcolor(purp.c, alpha.f = 0.3), border = dgry.c, lty = 5, lwd = 2)
  box();axis(1,cex.axis=2.5,tick=F,line=1)
  title(main=c(LETTERS[1:7])[i],cex.main=4)
}
plot.new()
plot.window(xlim=c(0,10),ylim=c(0,10))

legend("topleft", xpd = T, bty = "n", pch = 22, pt.bg = adjustcolor(orng.c, alpha.f = 0.3),
       pt.lwd = 0.5, lty = 1, lwd = 2, pt.cex = 8, seg.len = 1, col = dgry.c,
       legend = "Layer\nIndependence", cex = 3.5)
legend("left", xpd = T, bty = "n", pch = 22, pt.bg = adjustcolor(purp.c, alpha.f = 0.3),
       pt.lwd = 0.5, lty = 5, lwd = 2, pt.cex = 8, seg.len = 1, col = dgry.c,
       legend = "Cross-Layer\nDependence", cex = 3.5)
dev.off()

# End of document
